/**
 * @date 2015年12月22日 下午6:24:56
 * @version V1.0
 */
package MLDA_Process;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;

import Commons.IOUtil;

/**
 * @ClassName: MedSTC
 * @Description: TODO
 * @author ffftzh
 * @date 2015年12月22日 下午6:24:56
 *
 */
public class MedSTC {
    public static void data_format(String path) throws IOException {
        BufferedReader reader = IOUtil.getReader(path);
        BufferedWriter writer = IOUtil.getWriter(path + ".MedSTC");
        String line = null;
        HashMap<String, Integer> wordCount = new HashMap<String, Integer>();
        HashMap<String, Integer> wordIndex = new HashMap<String, Integer>();
        ArrayList<String> outputLine = new ArrayList<String>();
        while ((line = reader.readLine()) != null) {
            outputLine.clear();
            wordCount.clear();
            String[] wordlist = line.split("\\s+");
            for (String word : wordlist) {
                if (!wordCount.containsKey(word)) {
                    wordCount.put(word, 0);
                }
                if (!wordIndex.containsKey(word)) {
                    wordIndex.put(word, wordIndex.size() + 1);
                }
                wordCount.put(word, wordCount.get(word) + 1);
            }
            writer.write(wordCount.size() + " 0 ");
            for (String word : wordCount.keySet()) {
                outputLine.add(wordIndex.get(word) + ":" + wordCount.get(word));
            }
            System.out.println(line);
            writer.write(Commons.utils.join(outputLine, " ") + "\n");
        }
        reader.close();
        writer.close();

    }

    public static void main(String[] args) throws IOException {
        data_format("D:\\javaEE\\MLDA\\data2\\weibo\\SogouCAReduced.txt.data");
    }

}
